### Run this cell before continuing.
import altair as alt
import numpy as np
import pandas as pd
from sklearn import set_config
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# Simplify working with large datasets in Altair
alt.data_transformers.disable_max_rows()
# Output dataframes instead of arrays
set_config(transform_output="pandas")Worksheet - Linear Regression
Learning objectives
- Recognize situations where a simple regression analysis would be appropriate for making predictions.
- Perform ordinary least squares regression in Python using
scikit-learnto predict the values for a test dataset. - Use Python to fit simple and multivariable linear regression models on training data.
- Evaluate the linear regression model on test data.
This worksheet covers parts of Chapter 8 of the online textbook. You should read this chapter to gain a better understanding of this assignment. Any place you see ___, you must fill in the function, variable, or data to complete the code. Substitute the raise NotImplementedError with your completed code and answers then proceed to run the cell.
Marathon Training with Linear Regression!
Source: https://media.giphy.com/media/BDagLpxFIm3SM/giphy.gif
Question: what features predict whether athletes will perform better than others? Specifically, we are interested in marathon runners, and looking at how the maximum distance ran per week during training predicts the time it takes a runner to end the race?
This time around, however, we will analyze the data using simple linear regression rather than \(k\)-nn regression. In the end, we will compare our results to what we found with \(k\)-nn regression.
Question 1.0
{points: 1}
Load the marathon data from the data/ folder and assign it to an object called marathon.
### BEGIN SOLUTION
marathon = pd.read_csv("data/marathon.csv")
### END SOLUTION
marathon| age | bmi | female | footwear | group | injury | mf_d | mf_di | mf_ti | max | sprint | mf_s | time_hrs | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 35 | 23.592323 | 0 | 2 | 1 | 2 | 42195 | 4 | 10295 | 60.0 | 1 | 4.098592 | 2.859722 |
| 1 | 33 | 22.518295 | 0 | 2 | 2 | 2 | 42195 | 3 | 12292 | 50.0 | 0 | 3.432720 | 3.414444 |
| 2 | 38 | 25.560312 | 0 | 2 | 3 | 1 | 42195 | 4 | 10980 | 65.0 | 0 | 3.842896 | 3.050000 |
| 3 | 34 | 22.607931 | 0 | 2 | 1 | 1 | 42195 | 3 | 10694 | 88.0 | 1 | 3.945670 | 2.970556 |
| 4 | 39 | 24.974836 | 0 | 2 | 1 | 1 | 42195 | 2 | 13452 | 51.0 | 0 | 3.136708 | 3.736667 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 924 | 23 | 23.277760 | 1 | 2 | 2 | 1 | 42195 | 3 | 15660 | 18.0 | 0 | 2.694444 | 4.350000 |
| 925 | 30 | 24.489796 | 0 | 2 | 2 | 1 | 42195 | 2 | 16110 | 45.0 | 0 | 2.619181 | 4.475000 |
| 926 | 44 | 24.237617 | 0 | 2 | 3 | 1 | 42195 | 2 | 12289 | 63.0 | 1 | 3.433558 | 3.413611 |
| 927 | 34 | 21.249750 | 0 | 2 | 3 | 1 | 42195 | 3 | 12602 | 32.0 | 0 | 3.348278 | 3.500556 |
| 928 | 41 | 25.204788 | 0 | 2 | 1 | 1 | 42195 | 2 | 13500 | 20.0 | 1 | 3.125556 | 3.750000 |
929 rows × 13 columns
from hashlib import sha1
assert sha1(str(type(marathon is None)).encode("utf-8")+b"739e2").hexdigest() == "6e92f9a4e4c3ee38226eb47ccf55024c5f32aa31", "type of marathon is None is not bool. marathon is None should be a bool"
assert sha1(str(marathon is None).encode("utf-8")+b"739e2").hexdigest() == "59296bffcfb7bcd365f6c79ebfd8e5a40df62b2f", "boolean value of marathon is None is not correct"
assert sha1(str(type(marathon)).encode("utf-8")+b"739e3").hexdigest() == "bff0380c6a360860e46c818fdc334b2bcfeb5adb", "type of type(marathon) is not correct"
assert sha1(str(type(marathon.shape)).encode("utf-8")+b"739e4").hexdigest() == "1eada99c236028fcff78fd6303354632c2cb5e62", "type of marathon.shape is not tuple. marathon.shape should be a tuple"
assert sha1(str(len(marathon.shape)).encode("utf-8")+b"739e4").hexdigest() == "2e34911a50836d3cf47d13ee702a774b05b66acf", "length of marathon.shape is not correct"
assert sha1(str(sorted(map(str, marathon.shape))).encode("utf-8")+b"739e4").hexdigest() == "07d7c57045b521db23e59bcd29a30b64f750e408", "values of marathon.shape are not correct"
assert sha1(str(marathon.shape).encode("utf-8")+b"739e4").hexdigest() == "c1b0dafb0e4d543a159d3cef1509b02336267aef", "order of elements of marathon.shape is not correct"
assert sha1(str(type("time_hrs" in marathon.columns)).encode("utf-8")+b"739e5").hexdigest() == "b804c528bed5bf422960fd5c803d8d8689b2cdd6", "type of \"time_hrs\" in marathon.columns is not bool. \"time_hrs\" in marathon.columns should be a bool"
assert sha1(str("time_hrs" in marathon.columns).encode("utf-8")+b"739e5").hexdigest() == "65ee06ccfc8ecf0d0497cf6b3322c74797040486", "boolean value of \"time_hrs\" in marathon.columns is not correct"
assert sha1(str(type("max" in marathon.columns)).encode("utf-8")+b"739e6").hexdigest() == "73f7cbaa58ac0ae4fcf13fdb39b6a596d0a9e637", "type of \"max\" in marathon.columns is not bool. \"max\" in marathon.columns should be a bool"
assert sha1(str("max" in marathon.columns).encode("utf-8")+b"739e6").hexdigest() == "11b4588727f7e1cd86d42c8e61e257ddf72cb9cf", "boolean value of \"max\" in marathon.columns is not correct"
assert sha1(str(type(round(sum(marathon['max']), 0))).encode("utf-8")+b"739e7").hexdigest() == "cb0dcf331ef1975fca37d4d6dc1e453b08bf2a33", "type of round(sum(marathon['max']), 0) is not float. Please make sure it is float and not np.float64, etc. You can cast your value into a float using float()"
assert sha1(str(round(round(sum(marathon['max']), 0), 2)).encode("utf-8")+b"739e7").hexdigest() == "0c743d369241ee5ae9d99436f698cf6105d89029", "value of round(sum(marathon['max']), 0) is not correct (rounded to 2 decimal places)"
assert sha1(str(type(round(sum(marathon['time_hrs']), 0))).encode("utf-8")+b"739e8").hexdigest() == "bc74cad374cdd1f333d8f0dbdd3e8f40872b3827", "type of round(sum(marathon['time_hrs']), 0) is not float. Please make sure it is float and not np.float64, etc. You can cast your value into a float using float()"
assert sha1(str(round(round(sum(marathon['time_hrs']), 0), 2)).encode("utf-8")+b"739e8").hexdigest() == "014c3d62ed4ff997c449ec3c8c8a4ee9a2353073", "value of round(sum(marathon['time_hrs']), 0) is not correct (rounded to 2 decimal places)"
print('Success!')Success!
Question 1.1
{points: 1}
Similar to what we have been doing, we will first split the dataset into the training and testing datasets, using 75% of the original data as the training data. Remember, we will be putting the test dataset away in a ‘lock box’ that we will comeback to later after we choose our final model. Assign your training dataset to an object named marathon_training and your testing dataset to an object named marathon_testing.
Next, set the time_hrs as the target (y) and max as the feature (X). Store the features as X_train and X_test and targets as y_train and y_test respectively for the marathon_training and marathon_testing.
Assign the objects to marathon_training, marathon_testing, X_train, y_train, X_test and y_test respectively.
# ___, ___ = train_test_split(
# ___,
# test_size=___,
# random_state=2000, # Do not change the random_state
# )
# X_train = ___[___] # A single column data frame
# y_train = ___[___] # A series
# X_test = ___[___] # A single column data frame
# y_test = ___[___] # A series
### BEGIN SOLUTION
marathon_training, marathon_testing = train_test_split(
marathon,
test_size=0.25,
random_state=2000, # Do not change the random_state
)
X_train = marathon_training[["max"]] # A single-column data frame
y_train = marathon_training["time_hrs"] # A series
X_test = marathon_testing[["max"]] # A single-column data frame
y_test = marathon_testing["time_hrs"] # A series
### END SOLUTIONfrom hashlib import sha1
assert sha1(str(type(marathon_training is None)).encode("utf-8")+b"4f6ad").hexdigest() == "f674e38d6fdcb566897037d6bd60a47404feeb21", "type of marathon_training is None is not bool. marathon_training is None should be a bool"
assert sha1(str(marathon_training is None).encode("utf-8")+b"4f6ad").hexdigest() == "6fa797429392a5d615e12d87d6c270c224b62e87", "boolean value of marathon_training is None is not correct"
assert sha1(str(type(marathon_training.shape)).encode("utf-8")+b"4f6ae").hexdigest() == "c65c0f879cf7d4bd9e072e641450bd00c675daa9", "type of marathon_training.shape is not tuple. marathon_training.shape should be a tuple"
assert sha1(str(len(marathon_training.shape)).encode("utf-8")+b"4f6ae").hexdigest() == "33b155dbfcff513325406a6efb32adad20db7a02", "length of marathon_training.shape is not correct"
assert sha1(str(sorted(map(str, marathon_training.shape))).encode("utf-8")+b"4f6ae").hexdigest() == "30d227d9ac1ed57ba85872a2a07b6117d273c251", "values of marathon_training.shape are not correct"
assert sha1(str(marathon_training.shape).encode("utf-8")+b"4f6ae").hexdigest() == "bb16852ef07255cdb14cab3907de9fdd370231a4", "order of elements of marathon_training.shape is not correct"
assert sha1(str(type(sum(marathon_training.age))).encode("utf-8")+b"4f6af").hexdigest() == "ba50b4a4b9e30582e162d71f7e94533f0cd2670e", "type of sum(marathon_training.age) is not int. Please make sure it is int and not np.int64, etc. You can cast your value into an int using int()"
assert sha1(str(sum(marathon_training.age)).encode("utf-8")+b"4f6af").hexdigest() == "97e4849e109b18815b375aabbc4a675bf8b572f8", "value of sum(marathon_training.age) is not correct"
assert sha1(str(type(marathon_testing is None)).encode("utf-8")+b"4f6b0").hexdigest() == "874282999c909b82564d7ce122d4cc20808ae07d", "type of marathon_testing is None is not bool. marathon_testing is None should be a bool"
assert sha1(str(marathon_testing is None).encode("utf-8")+b"4f6b0").hexdigest() == "ca991e955d46513ca9dc43cb7faad1510405e6e2", "boolean value of marathon_testing is None is not correct"
assert sha1(str(type(marathon_testing.shape)).encode("utf-8")+b"4f6b1").hexdigest() == "3827f444c705a95782a04332415cd5d97f324216", "type of marathon_testing.shape is not tuple. marathon_testing.shape should be a tuple"
assert sha1(str(len(marathon_testing.shape)).encode("utf-8")+b"4f6b1").hexdigest() == "906078a9f2abf4de01fcf0daf6461d5788643f77", "length of marathon_testing.shape is not correct"
assert sha1(str(sorted(map(str, marathon_testing.shape))).encode("utf-8")+b"4f6b1").hexdigest() == "2cc1bdf059f9d6117e78f7d6f6e5821ec9fc1bdb", "values of marathon_testing.shape are not correct"
assert sha1(str(marathon_testing.shape).encode("utf-8")+b"4f6b1").hexdigest() == "5a1c8ea5fc9a728dee1c0b720c690e0a130064e2", "order of elements of marathon_testing.shape is not correct"
assert sha1(str(type(sum(marathon_testing.age))).encode("utf-8")+b"4f6b2").hexdigest() == "e7a8984d43f4d32c814e54d4358442ad0a96003c", "type of sum(marathon_testing.age) is not int. Please make sure it is int and not np.int64, etc. You can cast your value into an int using int()"
assert sha1(str(sum(marathon_testing.age)).encode("utf-8")+b"4f6b2").hexdigest() == "34fa963b05468045a3dfe6fa84cff7cdcc47d488", "value of sum(marathon_testing.age) is not correct"
assert sha1(str(type(X_train.columns.values)).encode("utf-8")+b"4f6b3").hexdigest() == "58cab9eda29ff2b2d6749caed7393fbdc6843f76", "type of X_train.columns.values is not correct"
assert sha1(str(X_train.columns.values).encode("utf-8")+b"4f6b3").hexdigest() == "569ca1bdf2aee914b88e0cd7bf39a6fb4701d65a", "value of X_train.columns.values is not correct"
assert sha1(str(type(X_train.shape)).encode("utf-8")+b"4f6b4").hexdigest() == "c6004691c652e546af1b6b67844b4e846344c1d6", "type of X_train.shape is not tuple. X_train.shape should be a tuple"
assert sha1(str(len(X_train.shape)).encode("utf-8")+b"4f6b4").hexdigest() == "b5e0ccffd5b8cbe7ba6731bff5c23dab8c0f4590", "length of X_train.shape is not correct"
assert sha1(str(sorted(map(str, X_train.shape))).encode("utf-8")+b"4f6b4").hexdigest() == "1e3e3ec67d9ab5e7ee523bab8f560ee2549bc591", "values of X_train.shape are not correct"
assert sha1(str(X_train.shape).encode("utf-8")+b"4f6b4").hexdigest() == "010ca15c9c455f5f34e84a6483db2b38abbf6bc0", "order of elements of X_train.shape is not correct"
assert sha1(str(type(y_train.name)).encode("utf-8")+b"4f6b5").hexdigest() == "dffb93a65b9c15090a1dac65485392d02dd3b05e", "type of y_train.name is not str. y_train.name should be an str"
assert sha1(str(len(y_train.name)).encode("utf-8")+b"4f6b5").hexdigest() == "0def0bf84e8da55beab10cd1fbfd9f7fbd31a1cc", "length of y_train.name is not correct"
assert sha1(str(y_train.name.lower()).encode("utf-8")+b"4f6b5").hexdigest() == "9ede03ef4c98e99b336e0465df700ab86f738d01", "value of y_train.name is not correct"
assert sha1(str(y_train.name).encode("utf-8")+b"4f6b5").hexdigest() == "9ede03ef4c98e99b336e0465df700ab86f738d01", "correct string value of y_train.name but incorrect case of letters"
assert sha1(str(type(y_train.shape)).encode("utf-8")+b"4f6b6").hexdigest() == "14ef9f53aed59146615896c647d72fdee84a030d", "type of y_train.shape is not tuple. y_train.shape should be a tuple"
assert sha1(str(len(y_train.shape)).encode("utf-8")+b"4f6b6").hexdigest() == "bcfdddcab49f87ca8545d798e2d70e638926a45e", "length of y_train.shape is not correct"
assert sha1(str(sorted(map(str, y_train.shape))).encode("utf-8")+b"4f6b6").hexdigest() == "9149d98edddfb638a4de90d61ed99ae9884bb8a7", "values of y_train.shape are not correct"
assert sha1(str(y_train.shape).encode("utf-8")+b"4f6b6").hexdigest() == "b7502ed93b7fb6823b51c2982622b915a4fb1041", "order of elements of y_train.shape is not correct"
assert sha1(str(type(X_test.columns.values)).encode("utf-8")+b"4f6b7").hexdigest() == "4474a8a8bfdfc8b012d956db8931980b76744722", "type of X_test.columns.values is not correct"
assert sha1(str(X_test.columns.values).encode("utf-8")+b"4f6b7").hexdigest() == "b2549900463f19a702b41da70c671104c0e8b682", "value of X_test.columns.values is not correct"
assert sha1(str(type(X_test.shape)).encode("utf-8")+b"4f6b8").hexdigest() == "4c2da79bd29e66ef3fbb291d5188459fc5c4706f", "type of X_test.shape is not tuple. X_test.shape should be a tuple"
assert sha1(str(len(X_test.shape)).encode("utf-8")+b"4f6b8").hexdigest() == "ecd604671069f62e45124763ec88fc80d7b82498", "length of X_test.shape is not correct"
assert sha1(str(sorted(map(str, X_test.shape))).encode("utf-8")+b"4f6b8").hexdigest() == "3bf5695fc56458e4405a6f88a4f5506f5d901cb8", "values of X_test.shape are not correct"
assert sha1(str(X_test.shape).encode("utf-8")+b"4f6b8").hexdigest() == "352788e35c8d809802def81e35c175258a93bd69", "order of elements of X_test.shape is not correct"
assert sha1(str(type(y_test.name)).encode("utf-8")+b"4f6b9").hexdigest() == "4b81d94ef69cd62c1396789593bbee6e8a5fa1a5", "type of y_test.name is not str. y_test.name should be an str"
assert sha1(str(len(y_test.name)).encode("utf-8")+b"4f6b9").hexdigest() == "f9f25f4c81c55229317166657fe150f016b21d38", "length of y_test.name is not correct"
assert sha1(str(y_test.name.lower()).encode("utf-8")+b"4f6b9").hexdigest() == "b708b2f0439fe252df818bf470eee84eb19da428", "value of y_test.name is not correct"
assert sha1(str(y_test.name).encode("utf-8")+b"4f6b9").hexdigest() == "b708b2f0439fe252df818bf470eee84eb19da428", "correct string value of y_test.name but incorrect case of letters"
assert sha1(str(type(y_test.shape)).encode("utf-8")+b"4f6ba").hexdigest() == "f18361d9de35bec90f44b45ee43725ea9777275c", "type of y_test.shape is not tuple. y_test.shape should be a tuple"
assert sha1(str(len(y_test.shape)).encode("utf-8")+b"4f6ba").hexdigest() == "e2309e5b908c5857ae791835ba17a8025f1c4312", "length of y_test.shape is not correct"
assert sha1(str(sorted(map(str, y_test.shape))).encode("utf-8")+b"4f6ba").hexdigest() == "89c5cd5a88c20980c89973a438767b9f8a88ce3e", "values of y_test.shape are not correct"
assert sha1(str(y_test.shape).encode("utf-8")+b"4f6ba").hexdigest() == "78ec5b8e1118ac59272fb5d5fdde68b526f3e166", "order of elements of y_test.shape is not correct"
print('Success!')Success!
Question 1.2
{points: 1}
Using only the observations in the training dataset, create a scatterplot to assess the relationship between race time (time_hrs) and maximum distance ran per week during training (max). Put time_hrs on the y-axis and max on the x-axis. Use mark_point and remember to do whatever is necessary to make this an effective visualization, including addressing overplotting in a suitable manner.
Assign this plot to an object called marathon_scatter.
### BEGIN SOLUTION
marathon_scatter = alt.Chart(marathon_training).mark_point(opacity=0.4).encode(
x=alt.X("max").title("Max Distance Ran per Week During Training (miles)"),
y=alt.Y("time_hrs")
.title("Race Time (hours)")
.scale(zero=False)
)
### END SOLUTION
marathon_scatterfrom hashlib import sha1
assert sha1(str(type(marathon_scatter is None)).encode("utf-8")+b"6986b").hexdigest() == "be72f6a83cd9482319f43f4b049fdd940e1f4c6d", "type of marathon_scatter is None is not bool. marathon_scatter is None should be a bool"
assert sha1(str(marathon_scatter is None).encode("utf-8")+b"6986b").hexdigest() == "79682770c2f1c151e70550446c11cc299f292dd9", "boolean value of marathon_scatter is None is not correct"
assert sha1(str(type(marathon_scatter.encoding.x['shorthand'])).encode("utf-8")+b"6986c").hexdigest() == "6d3c30ce678b314024b4d80b82e5ed9697c02ce0", "type of marathon_scatter.encoding.x['shorthand'] is not str. marathon_scatter.encoding.x['shorthand'] should be an str"
assert sha1(str(len(marathon_scatter.encoding.x['shorthand'])).encode("utf-8")+b"6986c").hexdigest() == "0d12f86dda5f48bc1e5d3675345950ca7a48c4e7", "length of marathon_scatter.encoding.x['shorthand'] is not correct"
assert sha1(str(marathon_scatter.encoding.x['shorthand'].lower()).encode("utf-8")+b"6986c").hexdigest() == "8630293695025e3d22cab897d7c35cb472b47b6a", "value of marathon_scatter.encoding.x['shorthand'] is not correct"
assert sha1(str(marathon_scatter.encoding.x['shorthand']).encode("utf-8")+b"6986c").hexdigest() == "8630293695025e3d22cab897d7c35cb472b47b6a", "correct string value of marathon_scatter.encoding.x['shorthand'] but incorrect case of letters"
assert sha1(str(type(marathon_scatter.encoding.y['shorthand'])).encode("utf-8")+b"6986d").hexdigest() == "2d41e0469437832c726023d3cf4e14159bd09b4c", "type of marathon_scatter.encoding.y['shorthand'] is not str. marathon_scatter.encoding.y['shorthand'] should be an str"
assert sha1(str(len(marathon_scatter.encoding.y['shorthand'])).encode("utf-8")+b"6986d").hexdigest() == "d931044a5378994c5f2df4ddb4637dbabf5d23f6", "length of marathon_scatter.encoding.y['shorthand'] is not correct"
assert sha1(str(marathon_scatter.encoding.y['shorthand'].lower()).encode("utf-8")+b"6986d").hexdigest() == "c3e46f0aece8b220b81d7e8ea490b10e5265e671", "value of marathon_scatter.encoding.y['shorthand'] is not correct"
assert sha1(str(marathon_scatter.encoding.y['shorthand']).encode("utf-8")+b"6986d").hexdigest() == "c3e46f0aece8b220b81d7e8ea490b10e5265e671", "correct string value of marathon_scatter.encoding.y['shorthand'] but incorrect case of letters"
assert sha1(str(type(marathon_scatter.mark.type)).encode("utf-8")+b"6986e").hexdigest() == "25bf70dd77143087753edd66a74bdbdab609b641", "type of marathon_scatter.mark.type is not str. marathon_scatter.mark.type should be an str"
assert sha1(str(len(marathon_scatter.mark.type)).encode("utf-8")+b"6986e").hexdigest() == "64843ac47bf38de071408c16ebd821f565913dc5", "length of marathon_scatter.mark.type is not correct"
assert sha1(str(marathon_scatter.mark.type.lower()).encode("utf-8")+b"6986e").hexdigest() == "0c5b2e8e01caf2686729e9e986b2e067d2f5f42c", "value of marathon_scatter.mark.type is not correct"
assert sha1(str(marathon_scatter.mark.type).encode("utf-8")+b"6986e").hexdigest() == "0c5b2e8e01caf2686729e9e986b2e067d2f5f42c", "correct string value of marathon_scatter.mark.type but incorrect case of letters"
assert sha1(str(type(marathon_scatter.data.shape[0])).encode("utf-8")+b"6986f").hexdigest() == "fb072f8df94e039c392391cbcbafbdcbb5f6b693", "type of marathon_scatter.data.shape[0] is not int. Please make sure it is int and not np.int64, etc. You can cast your value into an int using int()"
assert sha1(str(marathon_scatter.data.shape[0]).encode("utf-8")+b"6986f").hexdigest() == "b7c9c137f9bca9116ce605daa01dcd1c0edf9ec4", "value of marathon_scatter.data.shape[0] is not correct"
assert sha1(str(type('opacity' in marathon_scatter.mark.to_dict())).encode("utf-8")+b"69870").hexdigest() == "dd9e9a03b10d76cac8120e773139b76b99491ff9", "type of 'opacity' in marathon_scatter.mark.to_dict() is not bool. 'opacity' in marathon_scatter.mark.to_dict() should be a bool"
assert sha1(str('opacity' in marathon_scatter.mark.to_dict()).encode("utf-8")+b"69870").hexdigest() == "3c374d1e3da1cbd8cdef7d942941642f04285685", "boolean value of 'opacity' in marathon_scatter.mark.to_dict() is not correct"
assert sha1(str(type(isinstance(marathon_scatter.encoding.x['title'], str))).encode("utf-8")+b"69871").hexdigest() == "e02db54bd87fbf96616612a56f8acf8cf441d515", "type of isinstance(marathon_scatter.encoding.x['title'], str) is not bool. isinstance(marathon_scatter.encoding.x['title'], str) should be a bool"
assert sha1(str(isinstance(marathon_scatter.encoding.x['title'], str)).encode("utf-8")+b"69871").hexdigest() == "acd2e62da16b691eb93e82f53cbf50df9822ab62", "boolean value of isinstance(marathon_scatter.encoding.x['title'], str) is not correct"
assert sha1(str(type(isinstance(marathon_scatter.encoding.y['title'], str))).encode("utf-8")+b"69872").hexdigest() == "8295031153ea95799755569e0f06ae54b3f3b979", "type of isinstance(marathon_scatter.encoding.y['title'], str) is not bool. isinstance(marathon_scatter.encoding.y['title'], str) should be a bool"
assert sha1(str(isinstance(marathon_scatter.encoding.y['title'], str)).encode("utf-8")+b"69872").hexdigest() == "db51f97f3427306d46b9eb73b81474c0e722baa9", "boolean value of isinstance(marathon_scatter.encoding.y['title'], str) is not correct"
print('Success!')Success!
Question 1.3
{points: 1}
Now that we have looked at our training data, the next step is to build a linear regression model.
Instead of using the KNeighborsRegressor function, we will be using the LinearRegression function to let scikit-learn know we want to perform a linear regression.
Assign your answer to an object named lm.
# lm = _____()
### BEGIN SOLUTION
lm = LinearRegression()
### END SOLUTION
lmLinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
from hashlib import sha1
assert sha1(str(type(lm is None)).encode("utf-8")+b"72c66").hexdigest() == "e97b0813818ca802348d6ece9741f60284f4097b", "type of lm is None is not bool. lm is None should be a bool"
assert sha1(str(lm is None).encode("utf-8")+b"72c66").hexdigest() == "3ce1f42f1c88457c878430a27cbeb6f909b0785c", "boolean value of lm is None is not correct"
assert sha1(str(type(type(lm))).encode("utf-8")+b"72c67").hexdigest() == "1d32ce0e00e2e0a8dd67995b0fad71c00083aa05", "type of type(lm) is not correct"
assert sha1(str(type(lm)).encode("utf-8")+b"72c67").hexdigest() == "c91e4bde80d303487c49aae5c34b1f381a58f9c1", "value of type(lm) is not correct"
print('Success!')Success!
Question 1.3.1
{points: 1}
After we have created our linear regression model, the next step is to fit the training dataset.
Assign your answer to an object named lm_fit.
# ___ = ___.fit(___, ___)
### BEGIN SOLUTION
lm_fit = lm.fit(X_train, y_train)
### END SOLUTION
lm_fitLinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
sha1(str(type(lm_fit.intercept_)).encode("utf-8")+b"ad3bf").hexdigest()'6823fdcd989c2ea0c1c36707732f9cba4a6b0553'
from hashlib import sha1
assert sha1(str(type(lm_fit is None)).encode("utf-8")+b"ad3bc").hexdigest() == "1def08aec5fd67069ff460655ea29adfcd0cb743", "type of lm_fit is None is not bool. lm_fit is None should be a bool"
assert sha1(str(lm_fit is None).encode("utf-8")+b"ad3bc").hexdigest() == "fa8f0226d403f32ba484b227d8cc4455061bc019", "boolean value of lm_fit is None is not correct"
assert sha1(str(type(type(lm_fit))).encode("utf-8")+b"ad3bd").hexdigest() == "4366e1fb6059f520d285ba77171d1b1744923f34", "type of type(lm_fit) is not correct"
assert sha1(str(type(lm_fit)).encode("utf-8")+b"ad3bd").hexdigest() == "86f605051db98874d8cf0eb9c0db6fd94496091f", "value of type(lm_fit) is not correct"
assert sha1(str(type(lm_fit.coef_)).encode("utf-8")+b"ad3be").hexdigest() == "228ee7178527d0f2e5cff2853156700111ed8d69", "type of lm_fit.coef_ is not correct"
assert sha1(str(lm_fit.coef_).encode("utf-8")+b"ad3be").hexdigest() == "dfde7db1fb226af0937f0a18a269f587a6be23e6", "value of lm_fit.coef_ is not correct"
print('Success!')Success!
Question 1.4
{points: 1}
Now, let’s visualize the model predictions as a straight line overlaid on the training data. Use the predict function of lm to create predictions for the marathon_training data. Then, add the column of predictions to the marathon_training data frame using the assign function. Name the resulting data frame marathon_preds and the new column predictions.
Next, create a scatterplot with the marathon time (y-axis) against the maximum distance run per week (x-axis) from marathon_preds. Use mark_circle with an opacity of 0.4 to avoid overplotting. Assign your plot to a variable called marathon_plot. Plot the predictions as a black line over the data points. Remember the fundamentals of effective visualizations such as having a human-readable axes titles.
Name your plot marathon_plot.
# marathon_preds = ____.assign(
# predictions= _____.predict(____)
# )
# scatterplot = alt.Chart(marathon_preds).mark_circle(opacity=0.4).encode(
# x=alt.X("max").title("Max Distance Ran per Week During Training (miles)"),
# y=alt.Y("time_hrs")
# .title("Race Time (hours)")
# .scale(zero=False)
# )
# marathon_plot = scatterplot + scatterplot.mark_line(color='black').encode(
# y="predictions"
# )
### BEGIN SOLUTION
marathon_preds = marathon_training.assign(
predictions=lm.predict(X_train)
)
scatterplot = alt.Chart(marathon_preds).mark_circle(opacity=0.4).encode(
x=alt.X("max").title("Max Distance Ran per Week During Training (miles)"),
y=alt.Y("time_hrs")
.title("Race Time (hours)")
.scale(zero=False)
)
marathon_plot = scatterplot + scatterplot.mark_line(color='black').encode(
y="predictions"
)
### END SOLUTION
marathon_plotfrom hashlib import sha1
assert sha1(str(type(marathon_preds is None)).encode("utf-8")+b"e4358").hexdigest() == "bc2dd580b40ad4a5e8f6594b34d9ed807fc56664", "type of marathon_preds is None is not bool. marathon_preds is None should be a bool"
assert sha1(str(marathon_preds is None).encode("utf-8")+b"e4358").hexdigest() == "02df6a309e4c0c99b29a5ebbbf4dc40df215e117", "boolean value of marathon_preds is None is not correct"
assert sha1(str(type(marathon_preds)).encode("utf-8")+b"e4359").hexdigest() == "65a340844bf360cea4cd1679e420c55d09e7829e", "type of type(marathon_preds) is not correct"
assert sha1(str(type(marathon_preds.shape)).encode("utf-8")+b"e435a").hexdigest() == "94b8396ef806348aa6e6d9b48d968db839e0de43", "type of marathon_preds.shape is not tuple. marathon_preds.shape should be a tuple"
assert sha1(str(len(marathon_preds.shape)).encode("utf-8")+b"e435a").hexdigest() == "80b67edab105b410e9f659d46edd8aafd252ea78", "length of marathon_preds.shape is not correct"
assert sha1(str(sorted(map(str, marathon_preds.shape))).encode("utf-8")+b"e435a").hexdigest() == "c199087afcc62232805f8ca370d7d5302e4062f1", "values of marathon_preds.shape are not correct"
assert sha1(str(marathon_preds.shape).encode("utf-8")+b"e435a").hexdigest() == "75a932b02b8b81bcd148774ce58e3466adaa9909", "order of elements of marathon_preds.shape is not correct"
assert sha1(str(type("predictions" in marathon_preds.columns)).encode("utf-8")+b"e435b").hexdigest() == "7b0e6ae601f0ebf430c4e607354a84912f34ebc0", "type of \"predictions\" in marathon_preds.columns is not bool. \"predictions\" in marathon_preds.columns should be a bool"
assert sha1(str("predictions" in marathon_preds.columns).encode("utf-8")+b"e435b").hexdigest() == "d146815eae7ededa977915471ab4ffbe2dd98791", "boolean value of \"predictions\" in marathon_preds.columns is not correct"
assert sha1(str(type(sum(marathon_preds.predictions))).encode("utf-8")+b"e435c").hexdigest() == "3531d0a530b71a74d474021791451f589648ab04", "type of sum(marathon_preds.predictions) is not float. Please make sure it is float and not np.float64, etc. You can cast your value into a float using float()"
assert sha1(str(round(sum(marathon_preds.predictions), 2)).encode("utf-8")+b"e435c").hexdigest() == "6c69e3cb1469494214b9c3c30df43792dcd4283e", "value of sum(marathon_preds.predictions) is not correct (rounded to 2 decimal places)"
assert sha1(str(type(sum(marathon_preds.time_hrs))).encode("utf-8")+b"e435d").hexdigest() == "f6d32fcdfb7d338ca64c10d130c219848d79c187", "type of sum(marathon_preds.time_hrs) is not float. Please make sure it is float and not np.float64, etc. You can cast your value into a float using float()"
assert sha1(str(round(sum(marathon_preds.time_hrs), 2)).encode("utf-8")+b"e435d").hexdigest() == "2f02cb1ab8ae3a31440a49ce11b661ff8fe9147e", "value of sum(marathon_preds.time_hrs) is not correct (rounded to 2 decimal places)"
assert sha1(str(type(marathon_plot is None)).encode("utf-8")+b"e435e").hexdigest() == "6895d3cddb99e4147ba45a6c81bc4ddc364e23c2", "type of marathon_plot is None is not bool. marathon_plot is None should be a bool"
assert sha1(str(marathon_plot is None).encode("utf-8")+b"e435e").hexdigest() == "5390df15c8c3f55e9daedb39794d57638e99efee", "boolean value of marathon_plot is None is not correct"
assert sha1(str(type(len(marathon_plot.layer))).encode("utf-8")+b"e435f").hexdigest() == "e97a9a1e7ad3c233095b4ba0008121bc6a74261d", "type of len(marathon_plot.layer) is not int. Please make sure it is int and not np.int64, etc. You can cast your value into an int using int()"
assert sha1(str(len(marathon_plot.layer)).encode("utf-8")+b"e435f").hexdigest() == "2b20945255b0f2a7695411e709eafe5b6b888d2f", "value of len(marathon_plot.layer) is not correct"
assert sha1(str(type(marathon_plot.layer[0].mark)).encode("utf-8")+b"e4360").hexdigest() == "94d874e6d384f6f20dee36b57d02ee0e9d835e2d", "type of marathon_plot.layer[0].mark is not correct"
assert sha1(str(marathon_plot.layer[0].mark).encode("utf-8")+b"e4360").hexdigest() == "a944ff98b1e615df7fa19be2866a054377367251", "value of marathon_plot.layer[0].mark is not correct"
assert sha1(str(type(marathon_plot.layer[1].mark)).encode("utf-8")+b"e4361").hexdigest() == "71a33d1d974bc8658efed76d2ca77308b690d2fd", "type of marathon_plot.layer[1].mark is not correct"
assert sha1(str(marathon_plot.layer[1].mark).encode("utf-8")+b"e4361").hexdigest() == "a277cc73edf5fe1ef0dbef797094351e1114f778", "value of marathon_plot.layer[1].mark is not correct"
assert sha1(str(type(marathon_plot.layer[0].encoding.x['shorthand'])).encode("utf-8")+b"e4362").hexdigest() == "d8ab0ac01648e3309ae4d80e94515a5364bb5499", "type of marathon_plot.layer[0].encoding.x['shorthand'] is not str. marathon_plot.layer[0].encoding.x['shorthand'] should be an str"
assert sha1(str(len(marathon_plot.layer[0].encoding.x['shorthand'])).encode("utf-8")+b"e4362").hexdigest() == "839d138b1eeed4328365ef5af63e42c0910bb888", "length of marathon_plot.layer[0].encoding.x['shorthand'] is not correct"
assert sha1(str(marathon_plot.layer[0].encoding.x['shorthand'].lower()).encode("utf-8")+b"e4362").hexdigest() == "c6293c84c9e09e66386aeb65a91fcd8df6601e09", "value of marathon_plot.layer[0].encoding.x['shorthand'] is not correct"
assert sha1(str(marathon_plot.layer[0].encoding.x['shorthand']).encode("utf-8")+b"e4362").hexdigest() == "c6293c84c9e09e66386aeb65a91fcd8df6601e09", "correct string value of marathon_plot.layer[0].encoding.x['shorthand'] but incorrect case of letters"
assert sha1(str(type(marathon_plot.layer[0].encoding.y['shorthand'])).encode("utf-8")+b"e4363").hexdigest() == "3fd3774342c69f8858f9acea94a03c9890a18a93", "type of marathon_plot.layer[0].encoding.y['shorthand'] is not str. marathon_plot.layer[0].encoding.y['shorthand'] should be an str"
assert sha1(str(len(marathon_plot.layer[0].encoding.y['shorthand'])).encode("utf-8")+b"e4363").hexdigest() == "c5b64aeb2742f503eb3854087c4f836b06acede1", "length of marathon_plot.layer[0].encoding.y['shorthand'] is not correct"
assert sha1(str(marathon_plot.layer[0].encoding.y['shorthand'].lower()).encode("utf-8")+b"e4363").hexdigest() == "06f9ff5b96ee6e75cd8cf966d5552fe7f717f260", "value of marathon_plot.layer[0].encoding.y['shorthand'] is not correct"
assert sha1(str(marathon_plot.layer[0].encoding.y['shorthand']).encode("utf-8")+b"e4363").hexdigest() == "06f9ff5b96ee6e75cd8cf966d5552fe7f717f260", "correct string value of marathon_plot.layer[0].encoding.y['shorthand'] but incorrect case of letters"
assert sha1(str(type(marathon_plot.layer[1].encoding.y['shorthand'])).encode("utf-8")+b"e4364").hexdigest() == "14b5ec8e3ca9e9418bdbbaa77a9b3aa179999fe9", "type of marathon_plot.layer[1].encoding.y['shorthand'] is not str. marathon_plot.layer[1].encoding.y['shorthand'] should be an str"
assert sha1(str(len(marathon_plot.layer[1].encoding.y['shorthand'])).encode("utf-8")+b"e4364").hexdigest() == "6f6fc84ebe98dc81fb989889721610b969b2e700", "length of marathon_plot.layer[1].encoding.y['shorthand'] is not correct"
assert sha1(str(marathon_plot.layer[1].encoding.y['shorthand'].lower()).encode("utf-8")+b"e4364").hexdigest() == "6ae5bc9df9fd334f0eac6345e13a01b9dd019844", "value of marathon_plot.layer[1].encoding.y['shorthand'] is not correct"
assert sha1(str(marathon_plot.layer[1].encoding.y['shorthand']).encode("utf-8")+b"e4364").hexdigest() == "6ae5bc9df9fd334f0eac6345e13a01b9dd019844", "correct string value of marathon_plot.layer[1].encoding.y['shorthand'] but incorrect case of letters"
assert sha1(str(type(isinstance(marathon_plot.layer[0].encoding.x['title'], str))).encode("utf-8")+b"e4365").hexdigest() == "6d1dfcd98beb4541c755ed9ee5b65dbd667070f0", "type of isinstance(marathon_plot.layer[0].encoding.x['title'], str) is not bool. isinstance(marathon_plot.layer[0].encoding.x['title'], str) should be a bool"
assert sha1(str(isinstance(marathon_plot.layer[0].encoding.x['title'], str)).encode("utf-8")+b"e4365").hexdigest() == "7b15eabe489e4ffa5a44043c97d017234a7e0a0a", "boolean value of isinstance(marathon_plot.layer[0].encoding.x['title'], str) is not correct"
assert sha1(str(type(isinstance(marathon_plot.layer[0].encoding.y['title'], str))).encode("utf-8")+b"e4366").hexdigest() == "f0651e9d8742d2b0750aea688fcf11ea2e3383f8", "type of isinstance(marathon_plot.layer[0].encoding.y['title'], str) is not bool. isinstance(marathon_plot.layer[0].encoding.y['title'], str) should be a bool"
assert sha1(str(isinstance(marathon_plot.layer[0].encoding.y['title'], str)).encode("utf-8")+b"e4366").hexdigest() == "0e91055a5f12d74e8593f359e143862b6a8fc193", "boolean value of isinstance(marathon_plot.layer[0].encoding.y['title'], str) is not correct"
print('Success!')Success!
Question 1.5
{points: 1}
Great! We can now see the line of best fit on the graph. Now let’s calculate the RMSPE using the test data. To get to this point, first, use the lm object to make predictions on the test data. Then, add the column of predictions to the marathon_testing data frame using the assign function. Name the resulting data frame test_preds and the new column predictions.
Afterwards, calculate the RMSPE using the mean_squared_error function.
Assign the RMSPE score to an object called lm_rmspe.
# ___ = ___.assign(
# predictions=___.predict(___)
# )
# ___ = ___(___, ___)**(1/2)
### BEGIN SOLUTION
test_preds = marathon_testing.assign(
predictions=lm_fit.predict(X_test)
)
lm_rmspe = mean_squared_error(test_preds["time_hrs"], test_preds["predictions"])**(1/2)
### END SOLUTION
lm_rmspe0.625341986303691
from hashlib import sha1
assert sha1(str(type(test_preds is None)).encode("utf-8")+b"3ebdc").hexdigest() == "29ebee7ab2061555cdd2621507b3792dbb886297", "type of test_preds is None is not bool. test_preds is None should be a bool"
assert sha1(str(test_preds is None).encode("utf-8")+b"3ebdc").hexdigest() == "6a7c108f74c9bf5ddfe1618a204e71dd5069e8c5", "boolean value of test_preds is None is not correct"
assert sha1(str(type(test_preds)).encode("utf-8")+b"3ebdd").hexdigest() == "763191835b51c258b842454c271700f9cdfea3bf", "type of type(test_preds) is not correct"
assert sha1(str(type(test_preds.shape)).encode("utf-8")+b"3ebde").hexdigest() == "89d3f1293bc4be750513c8cdb8c60056333d8f96", "type of test_preds.shape is not tuple. test_preds.shape should be a tuple"
assert sha1(str(len(test_preds.shape)).encode("utf-8")+b"3ebde").hexdigest() == "bc721a860c2268ce910d4702796d17031d8ea8a8", "length of test_preds.shape is not correct"
assert sha1(str(sorted(map(str, test_preds.shape))).encode("utf-8")+b"3ebde").hexdigest() == "1a83b659bfa801824e9221c8426149a4b5fa861b", "values of test_preds.shape are not correct"
assert sha1(str(test_preds.shape).encode("utf-8")+b"3ebde").hexdigest() == "bd984a5c94d60104a3e575235235426ccfbc98c7", "order of elements of test_preds.shape is not correct"
assert sha1(str(type(sum(test_preds.predictions))).encode("utf-8")+b"3ebdf").hexdigest() == "a072e028d9a3b80e8f68ddb270a40eb499596870", "type of sum(test_preds.predictions) is not float. Please make sure it is float and not np.float64, etc. You can cast your value into a float using float()"
assert sha1(str(round(sum(test_preds.predictions), 2)).encode("utf-8")+b"3ebdf").hexdigest() == "71c296aa4d1218d1231b125f36f9a6c915f67cf1", "value of sum(test_preds.predictions) is not correct (rounded to 2 decimal places)"
assert sha1(str(type(round(lm_rmspe, 1))).encode("utf-8")+b"3ebe2").hexdigest() == "e566caf8a3b27dc42c29f121d257bc3db3506298", "type of round(lm_rmspe, 1) is not correct"
assert sha1(str(round(lm_rmspe, 1)).encode("utf-8")+b"3ebe2").hexdigest() == "a6d4449e0d5e3251aa0d7523dcbda18946bf141d", "value of round(lm_rmspe, 1) is not correct"
print('Success!')--------------------------------------------------------------------------- AssertionError Traceback (most recent call last) Cell In[16], line 18 15 assert sha1(str(type(lm_rmspe is None)).encode("utf-8")+b"3ebe0").hexdigest() == "dd5ba914f2ed74bf83a35c83506ab2ef26e21ed8", "type of lm_rmspe is None is not bool. lm_rmspe is None should be a bool" 16 assert sha1(str(lm_rmspe is None).encode("utf-8")+b"3ebe0").hexdigest() == "750b26eac59f5780869eb7f85afe415614161310", "boolean value of lm_rmspe is None is not correct" ---> 18 assert sha1(str(type(lm_rmspe)).encode("utf-8")+b"3ebe1").hexdigest() == "21a6df5760ffaceab2771ac0da61c372b6ed6d25", "type of type(lm_rmspe) is not correct" 20 assert sha1(str(type(round(lm_rmspe, 1))).encode("utf-8")+b"3ebe2").hexdigest() == "9a9ddc39cbb7a45670e4d1dd2b4ad91714ad8852", "type of round(lm_rmspe, 1) is not correct" 21 assert sha1(str(round(lm_rmspe, 1)).encode("utf-8")+b"3ebe2").hexdigest() == "a6d4449e0d5e3251aa0d7523dcbda18946bf141d", "value of round(lm_rmspe, 1) is not correct" AssertionError: type of type(lm_rmspe) is not correct
Question 1.5.1
{points: 1}
Now, let’s visualize the model predictions as a straight line overlaid on the test data. First, create a scatterplot to assess the relationship between race time (time_hrs) and maximum distance ran per week during training (max) on the testing data. Use mark_circle with an opacity of 0.4 to avoid overplotting. Then add a line to the plot corresponding to the predictions (predictions) from the fit linear regression model. Remember to do whatever is necessary to make this an effective visualization.
Assign the plot to an object called marathon_plot_test.
# marathon_plot = ___
### BEGIN SOLUTION
scatterplot_test = alt.Chart(test_preds).mark_circle(opacity=0.4).encode(
x=alt.X("max").title("Max Distance Ran per Week During Training (miles)"),
y=alt.Y("time_hrs")
.title("Race Time (hours)")
.scale(zero=False)
)
marathon_plot_test = scatterplot_test + scatterplot_test.mark_line(color='black').encode(
y="predictions"
)
# Another approach would have been to only change the data of the plot we made previously
# marathon_plot_test = marathon_plot.properties(data=test_preds)
### END SOLUTION
marathon_plot_testfrom hashlib import sha1
assert sha1(str(type(marathon_plot_test is None)).encode("utf-8")+b"61e8f").hexdigest() == "ee748f5ffeeccfa151a95c1c8e40fb2e51b1ce9f", "type of marathon_plot_test is None is not bool. marathon_plot_test is None should be a bool"
assert sha1(str(marathon_plot_test is None).encode("utf-8")+b"61e8f").hexdigest() == "f71b2edba5243e8d1a90ce0d7e7b37557a0f0052", "boolean value of marathon_plot_test is None is not correct"
assert sha1(str(type(len(marathon_plot_test.layer))).encode("utf-8")+b"61e90").hexdigest() == "6d39d35690606462cf06c5a978d8b727d3090082", "type of len(marathon_plot_test.layer) is not int. Please make sure it is int and not np.int64, etc. You can cast your value into an int using int()"
assert sha1(str(len(marathon_plot_test.layer)).encode("utf-8")+b"61e90").hexdigest() == "421355b186989ec09a7f6e161cda4600cd1021bb", "value of len(marathon_plot_test.layer) is not correct"
assert sha1(str(type(marathon_plot_test.layer[0].mark)).encode("utf-8")+b"61e91").hexdigest() == "3246755d74b7833a9ea578910cdf8696d3fce524", "type of marathon_plot_test.layer[0].mark is not correct"
assert sha1(str(marathon_plot_test.layer[0].mark).encode("utf-8")+b"61e91").hexdigest() == "bf51585cca690c54f425c1a947e05ea8faa39710", "value of marathon_plot_test.layer[0].mark is not correct"
assert sha1(str(type(marathon_plot_test.layer[1].mark)).encode("utf-8")+b"61e92").hexdigest() == "fed14b1460cbd668b78fe2a24b76c721a7b6ef35", "type of marathon_plot_test.layer[1].mark is not correct"
assert sha1(str(marathon_plot_test.layer[1].mark).encode("utf-8")+b"61e92").hexdigest() == "7c3f41c9edb9129aef9b6afbb5afbd53b5a21bc3", "value of marathon_plot_test.layer[1].mark is not correct"
assert sha1(str(type(marathon_plot_test.layer[0].encoding.x['shorthand'])).encode("utf-8")+b"61e93").hexdigest() == "b36d5e994a7b12c3f08e6d34610ef7745b04a68b", "type of marathon_plot_test.layer[0].encoding.x['shorthand'] is not str. marathon_plot_test.layer[0].encoding.x['shorthand'] should be an str"
assert sha1(str(len(marathon_plot_test.layer[0].encoding.x['shorthand'])).encode("utf-8")+b"61e93").hexdigest() == "6ab52032148d76cb4c992137ba98a5642ccd1c07", "length of marathon_plot_test.layer[0].encoding.x['shorthand'] is not correct"
assert sha1(str(marathon_plot_test.layer[0].encoding.x['shorthand'].lower()).encode("utf-8")+b"61e93").hexdigest() == "f5d25df889e84991486b73bd90133cc33805a2c7", "value of marathon_plot_test.layer[0].encoding.x['shorthand'] is not correct"
assert sha1(str(marathon_plot_test.layer[0].encoding.x['shorthand']).encode("utf-8")+b"61e93").hexdigest() == "f5d25df889e84991486b73bd90133cc33805a2c7", "correct string value of marathon_plot_test.layer[0].encoding.x['shorthand'] but incorrect case of letters"
assert sha1(str(type(marathon_plot_test.layer[0].encoding.y['shorthand'])).encode("utf-8")+b"61e94").hexdigest() == "ddef04a4b677f36a2461ae87bd64c145cbe414e7", "type of marathon_plot_test.layer[0].encoding.y['shorthand'] is not str. marathon_plot_test.layer[0].encoding.y['shorthand'] should be an str"
assert sha1(str(len(marathon_plot_test.layer[0].encoding.y['shorthand'])).encode("utf-8")+b"61e94").hexdigest() == "d12e59210898a0cbdd59886ed5b66fe72ef419a5", "length of marathon_plot_test.layer[0].encoding.y['shorthand'] is not correct"
assert sha1(str(marathon_plot_test.layer[0].encoding.y['shorthand'].lower()).encode("utf-8")+b"61e94").hexdigest() == "0528e9c8beb5841dd4f6ea3e120aa2ea6851f6d3", "value of marathon_plot_test.layer[0].encoding.y['shorthand'] is not correct"
assert sha1(str(marathon_plot_test.layer[0].encoding.y['shorthand']).encode("utf-8")+b"61e94").hexdigest() == "0528e9c8beb5841dd4f6ea3e120aa2ea6851f6d3", "correct string value of marathon_plot_test.layer[0].encoding.y['shorthand'] but incorrect case of letters"
assert sha1(str(type(marathon_plot_test.layer[1].encoding.y['shorthand'])).encode("utf-8")+b"61e95").hexdigest() == "393f83ca4126abac28e3721499e74cbc8eec49f5", "type of marathon_plot_test.layer[1].encoding.y['shorthand'] is not str. marathon_plot_test.layer[1].encoding.y['shorthand'] should be an str"
assert sha1(str(len(marathon_plot_test.layer[1].encoding.y['shorthand'])).encode("utf-8")+b"61e95").hexdigest() == "cf6648e5023cea85b35e732f848817cbef50a2ea", "length of marathon_plot_test.layer[1].encoding.y['shorthand'] is not correct"
assert sha1(str(marathon_plot_test.layer[1].encoding.y['shorthand'].lower()).encode("utf-8")+b"61e95").hexdigest() == "482407e92cdb21525edc25dc9759ecd8f28ac4b0", "value of marathon_plot_test.layer[1].encoding.y['shorthand'] is not correct"
assert sha1(str(marathon_plot_test.layer[1].encoding.y['shorthand']).encode("utf-8")+b"61e95").hexdigest() == "482407e92cdb21525edc25dc9759ecd8f28ac4b0", "correct string value of marathon_plot_test.layer[1].encoding.y['shorthand'] but incorrect case of letters"
assert sha1(str(type(isinstance(marathon_plot_test.layer[0].encoding.x['title'], str))).encode("utf-8")+b"61e96").hexdigest() == "4e9a552197c0d9f091c287c51186481faa373b84", "type of isinstance(marathon_plot_test.layer[0].encoding.x['title'], str) is not bool. isinstance(marathon_plot_test.layer[0].encoding.x['title'], str) should be a bool"
assert sha1(str(isinstance(marathon_plot_test.layer[0].encoding.x['title'], str)).encode("utf-8")+b"61e96").hexdigest() == "b5a959a717c4d36b796c0351445d67ceba59555b", "boolean value of isinstance(marathon_plot_test.layer[0].encoding.x['title'], str) is not correct"
assert sha1(str(type(isinstance(marathon_plot_test.layer[0].encoding.y['title'], str))).encode("utf-8")+b"61e97").hexdigest() == "69d55630d9b08b127c64a06a01079ec5fd719e2c", "type of isinstance(marathon_plot_test.layer[0].encoding.y['title'], str) is not bool. isinstance(marathon_plot_test.layer[0].encoding.y['title'], str) should be a bool"
assert sha1(str(isinstance(marathon_plot_test.layer[0].encoding.y['title'], str)).encode("utf-8")+b"61e97").hexdigest() == "9669df5114cc0166cef2d5b5f35d9333a937d24f", "boolean value of isinstance(marathon_plot_test.layer[0].encoding.y['title'], str) is not correct"
print('Success!')Given that the linear regression model is a straight line, we can write our model as a mathematical equation. We can get the two numbers we need for this from the coef_ and intercept_ attributes from lm_fit.
# run this cell
print(f"The coefficient for the linear regression is {lm_fit.coef_[0]:0.3f}.")
print(f"The intercept for the linear regression is {lm_fit.intercept_:0.3f}.")Question 1.6
{points: 1}
Which of the following mathematical equations represents the model based on the numbers output in the cell above?
A. \(Predicted \ race \ time \ (in \ hours) = 4.851 - 0.022 \times max \ (in \ miles)\)
B. \(Predicted \ race \ time \ (in \ hours) = -0.022 + 4.851 \times max \ (in \ miles)\)
C. \(Predicted \ max \ (in \ miles) = 4.851 - 0.022 \times \ race \ time \ (in \ hours)\)
D. \(Predicted \ max \ (in \ miles) = -0.022 + 4.851 \times \ race \ time \ (in \ hours)\)
Save the letter of your answer to a variable named answer1_6. Make sure you put quotations around the letter and pay attention to case.
### BEGIN SOLUTION
answer1_6 = "A"
### END SOLUTIONfrom hashlib import sha1
assert sha1(str(type(answer1_6)).encode("utf-8")+b"5b6e9").hexdigest() == "c1e898538331c6feb698ad544c8e708eb101f335", "type of answer1_6 is not str. answer1_6 should be an str"
assert sha1(str(len(answer1_6)).encode("utf-8")+b"5b6e9").hexdigest() == "4f4756a152ba2c274ee3b194dbe295ac57e849ac", "length of answer1_6 is not correct"
assert sha1(str(answer1_6.lower()).encode("utf-8")+b"5b6e9").hexdigest() == "11099d2bab2f6a29a949f290697d5e5906cece6b", "value of answer1_6 is not correct"
assert sha1(str(answer1_6).encode("utf-8")+b"5b6e9").hexdigest() == "8a5fd0b1b4b71ea33d160ad2d610f6af5c67f963", "correct string value of answer1_6 but incorrect case of letters"
print('Success!')